import time
import json
import os
import datetime


# 获取公司详情信息
from TycDetailParse.spiders.parseinfo import bs_parse_info

# 通过读取mysql数据库里面的公司url，获取详情并存储到本地
def get_company_info(driver, company_url):

    company_id = company_url.split('/')[4]

    driver.get(company_url)

    html = driver.page_source
    date = datetime.datetime.now().strftime('%Y-%m-%d')
    folder_path = './' + date + '/'
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    time.sleep(1)
    f2 = open(folder_path + company_id + ".html", "w", encoding='utf-8')
    f2.write(html)
    f2.close()

    bs_parse_info.get_company_info(folder_path, company_id)

    time.sleep(2)


# 通过读取本地文件夹里面的公司详情html，测试如何解析数据
# date_time 格式为 2018-12-04
# company_id 为本地已经存储的值
def get_company_info_for_debug(date_time, company_id):
    folder_path = '../../' + date_time + '/'

    # if not os.path.exists(folder_path):
    #     os.makedirs(folder_path)

    bs_parse_info.get_company_info(folder_path, company_id)

    time.sleep(2)

if __name__ == '__main__':
    get_company_info_for_debug("2018-12-04", "16060")

